global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/load_data_WIOD.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

set more off

***********************************************
*************** LOAD WIOD DATA  ***************
***********************************************
* Some Notes: 

* We import here the Import World IO Database wage, deflator and eployment 
* data (update of Euklems).
* The WIOD will be our baseline and EUklems data is used as robustness check
* The WIOD contains data on average low-skill, high-skill 
* wages from 1995 to 2009-2011 (depending on country).  

* There are some complications regarding exchange rates when we want to let
* the WIOD speak to the EUklems. 

* WIOD
* Is much more complicated than the EUklems. It is not entirely clear how it is 
* done, but I believe it is the following: For a country like Malta, which joined 
* in 2008 we have Euros from 2008 onwards, but converted to Euros before 2008 
* using the year-by-year-exchange rate. That's fine for the conversion to dollars, 
* but introduces a complication when we want to divide by CPI which will be 
* measured in whatever currency the country had before the conversion. We want to 
* convert WIOD data along the same lines as the EUklems data. I have inferred 
* from the EXR_WIOD_SEP12 file that everything has been converted to the euro/ecu 
* exchange rate going back to 1995. Hence, immediately when we import the WIOD 
* data we will convert it along the same lines as EUklems. It really won't matter 
* much for the larger countries, which were basically in fixed exchange rate 
* systems leading up, but will matter a bit for some of the smaller countries.

* We seek to include sectoral wages. EUklems 2008 uses a system that assigns D 
* to manufacturing and 15-37 as subcategories (NACE Rev 1). WIOD instead uses 
* C10-C33 and labels manufacturing C (NACE Rev 2). There is a correspondence 
* between the two. It is not exact, but we will use it. 


***********************
* Import WIOD data - Update of Euklems data* 
***********************

*Three steps:
*1. Import Data
*2. Renormalize using exchange rate such that WIOD can speak to EUklems
*3. Generate variables of interest for manufacturing sector "D". 

* We construct manufacturing from first pricinples by aggregating subcategories: 
* 15-37 (that's how it's done in Euklems).

* All files and variables are denoted _WIOD to distinguish them from EUklems data.


*****************************************
* 1. Import the WIOD, reshape and rename.
*****************************************
* WIOD Socio Economic Accounts, basic data on Output and employment | WIOD database 
* July 2014 release | https://www.rug.nl/ggdc/valuechain/wiod/wiod-2013-release
import excel using ${mow_data_raw}/WIOD/WIOD_SEA_July14.xlsx, sheet("DATA") firstrow clear 
reshape long _, i(Country Variable Code) j(year)
drop Description
reshape wide _, i(Country year Code) j(Variable) string
foreach var in H_EMP H_HS H_LS H_MS LAB LABHS LABLS LABMS VA VA_QI VA_P {
    rename _`var' `var'
    replace `var'="" if `var'=="NA"
    destring `var', replace
    rename `var' `var'_WIOD
}
save ${tmp_dir}/temp.dta, replace

**********************************************************************************
* 2. Renormalize so that they have Euros from the first date of Euro inclusion and 
* the Euro by the final exchange rate from before. 
**********************************************************************************

* 2.a. Import the euro_conversion rates and make it ready to merge with the WIOD data. 
* exchange rates Former national Currencies to Euro/ECU | Eurostat 
* https://ec.europa.eu/eurostat/databrowser/view/ert_h_eur_a__custom_12589563/default/table?lang=en | 
import delimited ${mow_data_raw}/exchange_rates/ert_h_eur_a_1_Data.csv, varnames(1) clear
rename time year

*Only keep the average exchange rate of the year. 
keep if statinfo=="Average"
gen Country=""
replace Country="aut" if currency=="Austrian schilling"
replace Country="bel" if currency=="Belgian franc"
replace Country="cyp" if currency=="Cyprus pound"
replace Country="deu" if currency=="German mark"
replace Country="est" if currency=="Estonian Kroon "
replace Country="esp" if currency=="Spanish peseta"
replace Country="fin" if currency=="Finnish markka"
replace Country="fra" if currency=="French franc"
replace Country="grc" if currency=="Greek drachma"
replace Country="irl" if currency=="Irish pound"
replace Country="ita" if currency=="Italian lira"
replace Country="ltu" if currency=="Lithuanian litas"
replace Country="lux" if currency=="Luxembourg franc"
replace Country="lva" if currency=="Latvian lats"
replace Country="mlt" if currency=="Maltese lira "
replace Country="nld" if currency=="Dutch guilder"
replace Country="prt" if currency=="Portuguese escudo"
replace Country="svn" if currency=="Slovenian tolar "
replace Country="svk" if currency=="Slovak koruna"
replace Country=upper(Country)
rename value euro_conversion
keep year Country euro_conversion

* 2.b. Merge values of euro_conversion with the WIOD data. And then clean up and renormalize. 
merge 1:m Country year using ${tmp_dir}/temp.dta

* Drop the extra year. 
drop if _merge==1

* Fix comma issue with Italy first. 
replace euro_conversion="1936.27" if year>=1999 & Country=="ITA"
replace euro_conversion="1943.64" if year==1998 & Country=="ITA"
replace euro_conversion="1929.30" if year==1997 & Country=="ITA"
replace euro_conversion="1958.96" if year==1996 & Country=="ITA"
replace euro_conversion="2130.14" if year==1995 & Country=="ITA"
destring euro_conversion, replace
replace euro_conversion=1 if euro_conversion==.

* Renormalize.
* Really a matter of multiplying with the change the ecu/LCU has undergone. 
* Multiply with the scale defined below in order to use the same (last) 
* conversion rate for all years before the country joined the Euro.  
sort Country Code year
by Country Code: gen scale=euro_conversion/euro_conversion[_N]
replace LAB_WIOD=LAB_WIOD*scale
replace VA_WIOD=VA_WIOD*scale
replace VA_P_WIOD=VA_P_WIOD*scale
drop scale euro_conversion


* Make sure they are at the same level as the EUklems data. 
replace LABLS_WIOD=LABLS_WIOD*100
replace LABMS_WIOD=LABMS_WIOD*100
replace LABHS_WIOD=LABHS_WIOD*100
replace H_LS_WIOD=H_LS_WIOD*100
replace H_MS_WIOD=H_MS_WIOD*100
replace H_HS_WIOD=H_HS_WIOD*100

* And create variables of interest over each industry code. 
*(note: LAB is labor compensation, while LABLS, LABMS, LABHS are the shares of labor compensation to low-, middle- and high-skill workers, respectively.
* Likewise, H_EMP are total hours worked and H_LS, H_MS, H_HS are the shares of hours worked by low-, middle- and high-skill workers.)
gen ls_wage_WIOD=(LAB_WIOD*LABLS_WIOD)/(H_EMP_WIOD*H_LS_WIOD)
gen ms_wage_WIOD=(LAB_WIOD*LABMS_WIOD)/(H_EMP_WIOD*H_MS_WIOD)
gen hs_wage_WIOD=(LAB_WIOD*LABHS_WIOD)/(H_EMP_WIOD*H_HS_WIOD)
gen lms_wage_WIOD=(LAB_WIOD*(LABLS_WIOD + LABMS_WIOD))/(H_EMP_WIOD* (H_MS_WIOD + H_LS_WIOD))
gen av_wage_WIOD=LAB_WIOD/H_EMP_WIOD
gen va_emp_WIOD=VA_WIOD/H_EMP_WIOD
gen LABLS_TOT_WIOD=LAB_WIOD*LABLS_WIOD
save ${tmp_dir}/temp_WIOD.dta, replace

****************************
* 3. Generate "D" for WIOD.
****************************
*  The most interesting industry is manufacturing, which is not explicitly 
* included in WIOD. It is, however, the sum of a distinct set of categories.
* (see EU KLEMS GROWTH AND PRODUCTIVITY ACCOUNTS Version 1.0 PART I Methodology
* March 2007) Thus, create the variables of intrest also over those industry 
* codes together and add them to the other dataset containing the variables 
* of interest for each industry code separately. 

keep if Code=="15t16" | Code=="17t18" | Code=="19" | Code=="20" | Code=="21t22" | Code=="23" | Code=="24" | Code=="25" | Code=="26" | Code=="27t28" | Code=="29" | Code=="30t33" | Code=="34t35" | Code=="36t37"
* We are only doing this for LAB LABHS LABLS LABMS H_HS H_LS H_MS H_EMP and 
* deflator (VA_P).
sort Country year 
drop LABLS_TOT_WIOD

* Calculate the nominator and denominator for our variables of interest by 
* the diffrent industry code. 
gen LABLS_TOT_WIOD = LAB_WIOD * LABLS_WIOD / 100
label variable LABLS_TOT_WIOD "Total labor compensation to low-skill workers"
gen LABMS_TOT_WIOD = LAB_WIOD * LABMS_WIOD / 100
label variable LABMS_TOT_WIOD "Total labor compensation to middle-skill workers"
gen LABLMS_TOT_WIOD = LAB_WIOD * (LABLS_WIOD + LABMS_WIOD) / 100
label variable LABLMS_TOT_WIOD "Total labor compensation to low-and middle-skill workers"
gen LABHS_TOT_WIOD = LAB_WIOD * LABHS_WIOD / 100
label variable LABHS_TOT_WIOD "Total labor compensation to high-skill workers"
gen H_HS_TOT_WIOD = H_EMP_WIOD * H_HS_WIOD / 100
label variable H_HS_TOT_WIOD "Hours worked for high-skill"
gen H_MS_TOT_WIOD = H_EMP_WIOD * H_MS_WIOD / 100
label variable H_MS_TOT_WIOD "Hours worked for middle-skill"
gen H_LMS_TOT_WIOD = H_EMP_WIOD * (H_LS_WIOD + H_MS_WIOD) / 100
label variable H_LMS_TOT_WIOD "Hours worked for low- and middle-skill"
gen H_LS_TOT_WIOD = H_EMP_WIOD * H_LS_WIOD / 100
label variable H_LS_TOT_WIOD "Hours worked for low-skill"

* Deflator - Value weighted change in VA_P for each of these industries. 
* Quantity Index - Value weighted change in VA_QI for each of these industries.
sort Country Code year
by Country Code: gen VA_P_WIOD_g=VA_P_WIOD[_n]/VA_P_WIOD[_n-1]-1 if _n>1
by Country Code: gen VA_QI_WIOD_g=VA_QI_WIOD[_n]/VA_QI_WIOD[_n-1]-1 if _n>1
* Weight the Deflator and Quantitiy Index. 
bysort Country year: egen VA_WIOD_tot=total(VA_WIOD)
gen VA_WIOD_weight=VA_WIOD/VA_WIOD_tot
gen VA_P_g_weight_WIOD=VA_P_WIOD_g*VA_WIOD_weight
gen VA_QI_g_weight_WIOD=VA_QI_WIOD_g*VA_WIOD_weight
* Sum the values over all the manufacturing industires. 
collapse (sum) LAB_WIOD LABLS_TOT_WIOD LABMS_TOT_WIOD LABLMS_TOT_WIOD LABHS_TOT_WIOD H_EMP_WIOD H_HS_TOT_WIOD H_MS_TOT_WIOD H_LMS_TOT_WIOD H_LS_TOT_WIOD VA_P_g_weight VA_QI_g_weight VA_WIOD, by(Country year)

* Zero values are set equal to missing.
foreach var in LAB_WIOD LABLS_TOT_WIOD LABMS_TOT_WIOD LABLMS_TOT_WIOD LABHS_TOT_WIOD H_EMP_WIOD H_HS_TOT_WIOD H_MS_TOT_WIOD H_LMS_TOT_WIOD H_LS_TOT_WIOD VA_P_g_weight_WIOD VA_QI_g_weight VA_WIOD {
    replace `var'=. if `var'==0
}

* Assign the manufacturing code to these variables. 
gen Code="D"
* Finally create the variables of interest over whole manufacturing.  
gen ls_wage_WIOD=LABLS_TOT_WIOD/H_LS_TOT_WIOD
gen ms_wage_WIOD=LABMS_TOT_WIOD/H_MS_TOT_WIOD
gen lms_wage_WIOD=LABLMS_TOT_WIOD/H_LMS_TOT_WIOD
gen hs_wage_WIOD=LABHS_TOT_WIOD/H_HS_TOT_WIOD
gen av_wage_WIOD=LAB_WIOD/H_EMP_WIOD
gen va_emp_WIOD=VA_WIOD/H_EMP_WIOD

* We also need H_LS H_MS H_HS LABLS LABMS LABHS over all manufacturing industies together. 
gen H_LS_WIOD=H_LS_TOT_WIOD/H_EMP_WIOD
gen H_MS_WIOD=H_MS_TOT_WIOD/H_EMP_WIOD
gen H_LMS_WIOD=H_LMS_TOT_WIOD/H_EMP_WIOD
gen H_HS_WIOD=H_HS_TOT_WIOD/H_EMP_WIOD

gen LABLS_WIOD=LABLS_TOT_WIOD/LAB_WIOD
gen LABMS_WIOD=LABMS_TOT_WIOD/LAB_WIOD
gen LABLMS_WIOD=LABLMS_TOT_WIOD/LAB_WIOD
gen LABHS_WIOD=LABHS_TOT_WIOD/LAB_WIOD

*Update VA_P VA_QI too and 1995 = 100 again. 
sort Country Code year
gen VA_P_WIOD=100
gen VA_QI_WIOD=100
by Country Code: replace VA_P_WIOD=VA_P_WIOD[_n-1]*(1+VA_P_g_weight) if _n>1
by Country Code: replace VA_QI_WIOD=VA_QI_WIOD[_n-1]*(1+VA_QI_g_weight) if _n>1
drop VA_P_g_weight_WIOD VA_QI_g_weight_WIOD


* Combine the datasets of the variables of intrests for all single industries 
* and the aggregated manufacturing industry again. 
append using ${tmp_dir}/temp_WIOD.dta
drop _*
rename Country country
rename Code code
replace country=lower(country)
replace country="usa-sic" if country=="usa"
replace country="ger" if country=="deu"

*  VA_P_WIOD and VA_QI_WIOD have to equal 100 in 2005.
* Put this in the end because otherwise for D
* we do not have 100 in 2005 and we forgot to set 
* VA_QI_WIOD to 100 in 2005 before. 
sort country code year
by country code: egen mgo=total(VA_P_WIOD) if year==2005
by country code: egen mgo2=total(mgo)
replace VA_P_WIOD=VA_P_WIOD/mgo2*100
drop mgo*

sort country code year
by country code: egen mgo=total(VA_QI_WIOD) if year==2005
by country code: egen mgo2=total(mgo)
replace VA_QI_WIOD=VA_QI_WIOD/mgo2*100
drop mgo*

label variable year "Year"
label variable country "Country"
label variable code "Sector code"

label variable LAB_WIOD "Labor compensation"
label variable LABLS_WIOD "Labor compensation share to low-skill workers"
label variable LABHS_WIOD "Labor compensation share to high-skill workers"
label variable LABLMS_WIOD "Labor compensation share to low- and middle-skill workers"
label variable LABMS_WIOD "Labor compensation to middle-skill workers"

label variable H_EMP_WIOD "Hours worked"
label variable H_LMS_TOT_WIOD "Hours worked for low- and middle-skill"
label variable H_MS_TOT_WIOD "Hours worked for middle-skill"
label variable H_LS_TOT_WIOD "Hours worked for low-skill"
label variable H_HS_TOT_WIOD "Hours worked for high-skill"

label variable H_MS_WIOD "Share of hours worked for middle-skill"
label variable H_LS_WIOD "Share of hours worked for low-skill"
label variable H_HS_WIOD "Share of hours worked for high-skill"
label variable H_LMS_WIOD "Hours worked for low- and middle-skill"

label variable LABLS_TOT_WIOD "Total labor compensation to low-skill workers"
label variable LABHS_TOT_WIOD "Total labor compensation to high-skill workers"
label variable LABMS_TOT_WIOD "Total labor compensation to middle-skill workers"
label variable LABLMS_TOT_WIOD "Total labor compensation to low- and middle-skill workers"

label variable VA_WIOD "Value added"
label variable va_emp_WIOD "Value added per employee"
label variable VA_P_WIOD "Price levels of gross value added"
label variable VA_QI_WIOD "Value added quantity index"

label variable ls_wage_WIOD "Low-skill wage"
label variable hs_wage_WIOD "High-skill wage"
label variable ms_wage_WIOD "Middle-skill wage"
label variable lms_wage_WIOD "Low- and middle-skill wage"
label variable av_wage_WIOD "Average wage"


save ${mow_data_proc}/WIOD_import.dta, replace
cap erase ${tmp_dir}/temp_WIOD.dta

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat